ModelBuilder.sanitize   A
last analyzed

Complexity

Conditions 3

Size

Total Lines 6
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 4
dl 0
loc 6
rs 10
c 0
b 0
f 0
cc 3
1
/* eslint-disable no-param-reassign */
2
import { start, end, indexToPosition, toHash, maxLettersSymbol  } from '../src/utils/hashUtils';
3
4
const endHash = toHash(end);
5
const startHash = toHash(start);
6
7
export default class ModelBuilder {
8
    collectHashes(letter, sanitized, index) {
9
        if (letter === null) return [ startHash ];
0 ignored issues
show
Coding Style Best Practice introduced by
Curly braces around statements make for more readable code and help prevent bugs when you add further statements.

Consider adding curly braces around all statements when they are executed conditionally. This is optional if there is only one statement, but leaving them out can lead to unexpected behaviour if another statement is added later.

Consider:

if (a > 0)
    b = 42;

If you or someone else later decides to put another statement in, only the first statement will be executed.

if (a > 0)
    console.log("a > 0");
    b = 42;

In this case the statement b = 42 will always be executed, while the logging statement will be executed conditionally.

if (a > 0) {
    console.log("a > 0");
    b = 42;
}

ensures that the proper code will be executed conditionally no matter how many statements are added or removed.

Loading history...
10
11
        const hashes = [];
12
        const symbols = [];
13
14
        for (let limit = 0; limit < maxLettersSymbol; limit++) {
15
            const sliced = sanitized.slice(index - limit, index + 1).join('');
16
17
            if (sliced.length > 0) symbols.push(sliced);
0 ignored issues
show
Coding Style Best Practice introduced by
Curly braces around statements make for more readable code and help prevent bugs when you add further statements.

Consider adding curly braces around all statements when they are executed conditionally. This is optional if there is only one statement, but leaving them out can lead to unexpected behaviour if another statement is added later.

Consider:

if (a > 0)
    b = 42;

If you or someone else later decides to put another statement in, only the first statement will be executed.

if (a > 0)
    console.log("a > 0");
    b = 42;

In this case the statement b = 42 will always be executed, while the logging statement will be executed conditionally.

if (a > 0) {
    console.log("a > 0");
    b = 42;
}

ensures that the proper code will be executed conditionally no matter how many statements are added or removed.

Loading history...
18
        }
19
20
        for (const symbol of symbols) {
21
            hashes.push(
22
                symbol,
23
                toHash({
24
                    symbol,
25
                    position : indexToPosition(index)
26
                })
27
            );
28
        }
29
30
        return hashes;
31
    }
32
33
    sanitize(word, caseType) {
34
        if (caseType === 'LOWER') return word.toLowerCase();
0 ignored issues
show
Coding Style Best Practice introduced by
Curly braces around statements make for more readable code and help prevent bugs when you add further statements.

Consider adding curly braces around all statements when they are executed conditionally. This is optional if there is only one statement, but leaving them out can lead to unexpected behaviour if another statement is added later.

Consider:

if (a > 0)
    b = 42;

If you or someone else later decides to put another statement in, only the first statement will be executed.

if (a > 0)
    console.log("a > 0");
    b = 42;

In this case the statement b = 42 will always be executed, while the logging statement will be executed conditionally.

if (a > 0) {
    console.log("a > 0");
    b = 42;
}

ensures that the proper code will be executed conditionally no matter how many statements are added or removed.

Loading history...
35
        if (caseType === 'UPPER') return word.toUpperCase();
0 ignored issues
show
Coding Style Best Practice introduced by
Curly braces around statements make for more readable code and help prevent bugs when you add further statements.

Consider adding curly braces around all statements when they are executed conditionally. This is optional if there is only one statement, but leaving them out can lead to unexpected behaviour if another statement is added later.

Consider:

if (a > 0)
    b = 42;

If you or someone else later decides to put another statement in, only the first statement will be executed.

if (a > 0)
    console.log("a > 0");
    b = 42;

In this case the statement b = 42 will always be executed, while the logging statement will be executed conditionally.

if (a > 0) {
    console.log("a > 0");
    b = 42;
}

ensures that the proper code will be executed conditionally no matter how many statements are added or removed.

Loading history...
36
37
        return word;
38
    }
39
40
    getTokens(word, tokens) {
41
        return this.sanitize(word).split(tokens.delim);
42
    }
43
44
    buildMarkov(dataset, model, tokenConfig) {
45
        for (const item of dataset) {
46
            const { word, weight } = item;
47
            const sanitized = this.getTokens(word, tokenConfig);
48
            const array = [ null, ...sanitized ];
49
50
            for (let index = 0; index < array.length; index++) {
51
                const letter = array[index];
52
                const hashes = this.collectHashes(letter, sanitized, index - 1);
53
54
                for (const hash of hashes) {
55
                    let map = model[hash];
56
57
                    if (!map) {
58
                        map = {};
59
                        model[hash] = map;
60
                    }
61
62
                    const nextSymbol = array[index + 1] || endHash;
63
                    const old = map[nextSymbol] || 0;
64
65
                    map[nextSymbol] = old + Math.log(1 + weight);
66
                }
67
            }
68
        }
69
70
        return model;
71
    }
72
73
    normalize(model) {
74
        Object.keys(model).forEach(hash => {
75
            const map = model[hash];
76
            const keys = Object.keys(map);
77
78
            if (keys.length <= 1) delete model[hash];
0 ignored issues
show
Coding Style Best Practice introduced by
Curly braces around statements make for more readable code and help prevent bugs when you add further statements.

Consider adding curly braces around all statements when they are executed conditionally. This is optional if there is only one statement, but leaving them out can lead to unexpected behaviour if another statement is added later.

Consider:

if (a > 0)
    b = 42;

If you or someone else later decides to put another statement in, only the first statement will be executed.

if (a > 0)
    console.log("a > 0");
    b = 42;

In this case the statement b = 42 will always be executed, while the logging statement will be executed conditionally.

if (a > 0) {
    console.log("a > 0");
    b = 42;
}

ensures that the proper code will be executed conditionally no matter how many statements are added or removed.

Loading history...
79
80
            const sum = Object.values(map).reduce((a, b) => a + b, 0);
81
82
            keys.forEach(key => map[key] = map[key] / sum);
83
        });
84
85
        return model;
86
    }
87
88
    buildStatic(dataset, model, tokenConfig) {
89
        for (const item of dataset) {
90
            const { word, weight } = item;
91
            const sanitized = this.sanitize(word, tokenConfig);
92
            const old = model[sanitized] || 0;
93
94
            model[sanitized] = old + Math.log(1 + weight);
95
        }
96
97
        const keys = Object.keys(model);
98
        const sum = Object.values(model).reduce((a, b) => a + b, 0);
99
100
        keys.forEach(key => model[key] = model[key] / sum);
101
102
        return model;
103
    }
104
}
105